Wilcoxon Test
plot-state: single normal sample from PC (BRCA1 vs
TN)
#extract data from brca
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
b17 <- read.table(file = "~/brca-infercnv/brca_output_dir_min_1_norm/17_HMM_predHMMi6.leiden.hmm_mode-subclusters.pred_cnv_genes.dat",
col.names = c("cell_group_name", "gene_region_name", "state", "gene", "chr", "start", "end"))
#remove col names
b17 <- b17[2:length(rownames(b17)),]
#deletions
b17del <- b17 %>%
mutate(type = gsub("\\..*","", b17$cell_group_name)) %>%
filter(state < 3) %>%
rename(`Sample Name` = type) %>%
mutate(state = as.numeric(state)) %>%
group_by(cell_group_name) %>%
mutate(`Summed States` = sum(state)) %>%
distinct(cell_group_name, .keep_all = TRUE)
#extract data from brca
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
t17 <- read.table(file = "~/brca-infercnv/tn_output_dir_subset_1_norm/17_HMM_predHMMi6.leiden.hmm_mode-subclusters.pred_cnv_genes.dat",
col.names = c("cell_group_name", "gene_region_name", "state", "gene", "chr", "start", "end"))
#remove col names
t17 <- t17[2:length(rownames(t17)),]
#deletions
t17del <- t17 %>%
mutate(type = gsub("\\..*","", t17$cell_group_name)) %>%
filter(state < 3) %>%
rename(`Sample Name` = type) %>%
mutate(state = as.numeric(state)) %>%
group_by(cell_group_name) %>%
mutate(`Summed States` = sum(state)) %>%
distinct(cell_group_name, .keep_all = TRUE)
#combine dfs
del <- rbind(b17del, t17del)
#generate plot
plot <- ggplot(del, aes(x = `Sample Name`, y = `Summed States`, fill = `Sample Name`)) +
geom_violin(scale = "width", adjust = 1.5) + theme(axis.text.x = element_text(angle =45, hjust = 1)) +
geom_point(size=0.1, position = "jitter", width = 0.2, size = 2, alpha = 0.7, shape = 21, color = "black") +
labs(
title = "Violin Plot of Copy Number Deletions",
x = "BRCA1 Tumors (TN_B1) and TN tumors (TN)",
y = "Sum of Deletions"
) +
scale_fill_brewer(palette = "Set3") +
theme_minimal(base_size = 15) +
theme(legend.position = "none",
panel.border = element_rect(fill = NA, color = "black", linewidth = 1.5),
strip.background = element_rect(fill = "grey90", color = "black", size = 1.5),
panel.spacing = unit(1, "lines"),
plot.title = element_text(face = "bold", hjust = 0.5),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1))
#output plot
plot

state: single wilcoxon test
#create separate b1 df
b1del <- b17del%>% mutate(genotype = `Sample Name`)
b1.names <- c("TN_B1_0131", "TN_B1_0177", "TN_B1_4031", "TN_B1_0554")
b1del <- b1del %>% filter(genotype %in% b1.names)
b1del$genotype <- str_sub(b1del$genotype, start = 1, end = 5)
b1del$genotype <- sub("TN_B1", "BRCA1 Tumor", b1del$genotype)
#create separate tn df
tndel <- t17del %>% mutate(genotype = `Sample Name`)
tn.names <- c("TN_0106", "TN_0126", "TN_0114", "TN_0135")
tndel <- tndel %>% filter(genotype %in% tn.names)
tndel$genotype <- str_sub(tndel$genotype, start = 1, end = 2)
tndel$genotype <- sub("TN", "TN Tumor", tndel$genotype)
#make numeric
b1del$state <- as.numeric(b1del$`Summed States`)
tndel$state <- as.numeric(tndel$`Summed States`)
#xilcox test for tumors
w.tum.st <- wilcox.test(b1del$`Summed States`, tndel$state, alternative = "two.sided")
w.tum.st #W = 21200, p-value < 2.2e-16
##
## Wilcoxon rank sum test with continuity correction
##
## data: b1del$`Summed States` and tndel$state
## W = 21200, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
plot-length: single normal sample from PC (BRCA1 vs
TN)
#extract data from brca
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
b17 <- read.table(file = "~/brca-infercnv/tn_b1_epi_output_dir_subset_min/17_HMM_predHMMi6.leiden.hmm_mode-subclusters.pred_cnv_genes.dat",
col.names = c("cell_group_name", "gene_region_name", "state", "gene", "chr", "start", "end"))
#remove col names
b17 <- b17[2:length(rownames(b17)),]
#deletions
b17del <- b17 %>%
mutate(type = gsub("\\..*","", b17$cell_group_name)) %>%
filter(state < 3) %>%
rename(`Sample Name` = type) %>%
mutate(Length = (as.numeric(end) - as.numeric(start)) / 1e6) %>%
group_by(cell_group_name) %>%
mutate(`Summed Length` = sum(Length)) %>%
distinct(cell_group_name, .keep_all = TRUE)
#extract data from brca
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
t17 <- read.table(file = "~/brca-infercnv/tn_output_dir_subset_1_norm/17_HMM_predHMMi6.leiden.hmm_mode-subclusters.pred_cnv_genes.dat",
col.names = c("cell_group_name", "gene_region_name", "state", "gene", "chr", "start", "end"))
#remove col names
t17 <- t17[2:length(rownames(t17)),]
#deletions
t17del <- t17 %>%
mutate(type = gsub("\\..*","", t17$cell_group_name)) %>%
filter(state < 3) %>%
rename(`Sample Name` = type) %>%
mutate(Length = (as.numeric(end) - as.numeric(start)) / 1e6) %>%
group_by(cell_group_name) %>%
mutate(`Summed Length` = sum(Length)) %>%
distinct(cell_group_name, .keep_all = TRUE)
#combine dfs
del <- rbind(b17del, t17del)
#violin plot
plot <- ggplot(del, aes(x = `Sample Name`, y = `Summed Length`, fill = `Sample Name`)) +
geom_violin(scale = "width", adjust = 1.5) + theme(axis.text.x = element_text(angle =45, hjust = 1)) +
geom_point(size=0.1, position = "jitter", width = 0.2, size = 2, alpha = 0.7, shape = 21, color = "black") +
labs(
title = "Violin Plot of Copy Number Deletions",
x = "BRCA1 Tumors (TN_B1) and TN tumors (TN)",
y = "Summed Length of Deletions"
) +
scale_fill_brewer(palette = "Set3") +
theme_minimal(base_size = 15) +
theme(legend.position = "none",
panel.border = element_rect(fill = NA, color = "black", linewidth = 1.5),
strip.background = element_rect(fill = "grey90", color = "black", size = 1.5),
panel.spacing = unit(1, "lines"),
plot.title = element_text(face = "bold", hjust = 0.5),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1))
#plot
plot

length: single wilcoxon test
#create separate b1 df
b1del <- b17del%>% mutate(genotype = `Sample Name`)
b1.names <- c("TN_B1_0131", "TN_B1_0177", "TN_B1_4031", "TN_B1_0554")
b1del <- b1del %>% filter(genotype %in% b1.names)
b1del$genotype <- str_sub(b1del$genotype, start = 1, end = 5)
b1del$genotype <- sub("TN_B1", "BRCA1 Tumor", b1del$genotype)
#create separate tn df
tndel <- t17del %>% mutate(genotype = `Sample Name`)
tn.names <- c("TN_0106", "TN_0126", "TN_0114", "TN_0135")
tndel <- tndel %>% filter(genotype %in% tn.names)
tndel$genotype <- str_sub(tndel$genotype, start = 1, end = 2)
tndel$genotype <- sub("TN", "TN Tumor", tndel$genotype)
#make numeric
b1del$`Summed Length` <- as.numeric(b1del$`Summed Length`)
tndel$`Summed Length` <- as.numeric(tndel$`Summed Length`)
#xilcox test for tumors
w.tum.ln <- wilcox.test(b1del$`Summed Length`, tndel$`Summed Length`, alternative = "two.sided")
w.tum.ln #W = 23113, p-value = 4.962e-14
##
## Wilcoxon rank sum test with continuity correction
##
## data: b1del$`Summed Length` and tndel$`Summed Length`
## W = 23113, p-value = 4.962e-14
## alternative hypothesis: true location shift is not equal to 0
plot-state: multiple samples from PC (B1 vs
pre-neoplastic)
#extract data from brca
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
b17 <- read.table(file = "~/brca-infercnv/brca_output_dir_pc_multi/17_HMM_predHMMi6.leiden.hmm_mode-subclusters.pred_cnv_genes.dat",
col.names = c("cell_group_name", "gene_region_name", "state", "gene", "chr", "start", "end"))
#remove col names
b17 <- b17[2:length(rownames(b17)),]
#deletions
b17del <- b17 %>%
mutate(type = gsub("\\..*","", b17$cell_group_name)) %>%
filter(state < 3) %>%
rename(`Sample Name` = type) %>%
mutate(state = as.numeric(state)) %>%
group_by(cell_group_name) %>%
mutate(`Summed States` = sum(state)) %>%
distinct(cell_group_name, .keep_all = TRUE)
#generate plot
bplot <- ggplot(b17del, aes(x = `Sample Name`, y = `Summed States`, fill = `Sample Name`)) +
geom_violin(scale = "width", adjust = 1.5) + theme(axis.text.x = element_text(angle =45, hjust = 1)) +
geom_point(size=0.1, position = "jitter", width = 0.2, size = 2, alpha = 0.7, shape = 21, color = "black") +
labs(
title = "Violin Plot of Copy Number Deletions",
x = "Preneoplastic tissue (B1) and BRCA1 tumors (TN_B1)",
y = "Sum of Deletions"
) +
scale_fill_brewer(palette = "Set3") +
theme_minimal(base_size = 15) +
theme(legend.position = "none",
panel.border = element_rect(fill = NA, color = "black", linewidth = 1.5),
strip.background = element_rect(fill = "grey90", color = "black", size = 1.5),
panel.spacing = unit(1, "lines"),
plot.title = element_text(face = "bold", hjust = 0.5),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1))
#output plot
bplot

state: multiple wilcoxon test
#seperate B1 and PN
b1del <- b17del %>% mutate(genotype = `Sample Name`)
b1.names <- c("TN_B1_0131", "TN_B1_0177", "TN_B1_4031", "TN_B1_0554")
b1del <- b1del %>% filter(genotype %in% b1.names)
b1del$genotype <- str_sub(b1del$genotype, start = 1, end = 5)
b1del$genotype <- sub("TN_B1", "BRCA1 Tumor", b1del$genotype)
pndel <- b17del %>% mutate(genotype = `Sample Name`)
pn.names <- c("B1_0894", "B1_0033", "B1_0023", "B1_0090")
pndel <- pndel %>% filter(genotype %in% pn.names)
pndel$genotype <- str_sub(pndel$genotype, start = 1, end = 2)
pndel <- pndel %>% filter(genotype == "B1")
pndel$genotype <- sub("B1", "BRCA1 Preneoplastic", pndel$genotype)
#make numeric
b1del$`Summed States` <- as.numeric(b1del$`Summed States`)
pndel$`Summed States` <- as.numeric(pndel$`Summed States`)
#wilcox test for brca carriers
w.brca.st <- wilcox.test(b1del$`Summed States`, pndel$`Summed States`, alternative = "two.sided")
w.brca.st #W = 156334, p-value < 2.2e-16
##
## Wilcoxon rank sum test with continuity correction
##
## data: b1del$`Summed States` and pndel$`Summed States`
## W = 156334, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
plot-length: multiple samples from PC (B1 vs
pre-neoplastic)
#extract data from brca
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
b17 <- read.table(file = "~/brca-infercnv/brca_output_dir_pc_multi/17_HMM_predHMMi6.leiden.hmm_mode-subclusters.pred_cnv_genes.dat",
col.names = c("cell_group_name", "gene_region_name", "state", "gene", "chr", "start", "end"))
#remove col names
b17 <- b17[2:length(rownames(b17)),]
#deletions
b17del <- b17 %>%
mutate(type = gsub("\\..*","", b17$cell_group_name)) %>%
filter(state < 3) %>%
rename(`Sample Name` = type) %>%
mutate(Length = (as.numeric(end) - as.numeric(start)) / 1e6) %>%
group_by(cell_group_name) %>%
mutate(`Summed Length` = sum(Length)) %>%
distinct(cell_group_name, .keep_all = TRUE)
#violin plot
bplot <- ggplot(b17del, aes(x = `Sample Name`, y = `Summed Length`, fill = `Sample Name`)) +
geom_violin(scale = "width", adjust = 1.5) + theme(axis.text.x = element_text(angle =45, hjust = 1)) +
geom_point(size=0.1, position = "jitter", width = 0.2, size = 2, alpha = 0.7, shape = 21, color = "black") +
labs(
title = "Violin Plot of Copy Number Deletions",
x = "Preneoplastic tissue (B1) and BRCA1 tumors (TN_B1)",
y = "Summed Length of Deletions"
) +
scale_fill_brewer(palette = "Set3") +
theme_minimal(base_size = 15) +
theme(legend.position = "none",
panel.border = element_rect(fill = NA, color = "black", linewidth = 1.5),
strip.background = element_rect(fill = "grey90", color = "black", size = 1.5),
panel.spacing = unit(1, "lines"),
plot.title = element_text(face = "bold", hjust = 0.5),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1))
#plot
bplot

length: multiple wilcoxon test
#seperate B1 and PN
#create separate b1 df
b1del <- b17del%>% mutate(genotype = `Sample Name`)
b1.names <- c("TN_B1_0131", "TN_B1_0177", "TN_B1_4031", "TN_B1_0554")
b1del <- b1del %>% filter(genotype %in% b1.names)
b1del$genotype <- str_sub(b1del$genotype, start = 1, end = 5)
b1del$genotype <- sub("TN_B1", "BRCA1 Tumor", b1del$genotype)
pndel <- b17del %>% mutate(genotype = `Sample Name`)
pn.names <- c("B1_0894", "B1_0033", "B1_0023", "B1_0090")
pndel <- pndel %>% filter(genotype %in% pn.names)
pndel$genotype <- str_sub(pndel$genotype, start = 1, end = 2)
pndel <- pndel %>% filter(genotype == "B1")
pndel$genotype <- sub("B1", "BRCA1 Preneoplastic", pndel$genotype)
#make numeric
b1del$`Summed Length` <- as.numeric(b1del$`Summed Length`)
pndel$`Summed Length` <- as.numeric(pndel$`Summed Length`)
#wilcox test for brca carriers
w.brca.ln <- wilcox.test(b1del$`Summed Length`, pndel$`Summed Length`, alternative = "two.sided")
w.brca.ln #W = 157272, p-value < 2.2e-16
##
## Wilcoxon rank sum test with continuity correction
##
## data: b1del$`Summed Length` and pndel$`Summed Length`
## W = 157272, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
plot-state: multiple samples from PC (TN vs normal
premenopausal)
#extract data from brca
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
t17 <- read.table(file = "~/brca-infercnv/tp_subset_output_dir/17_HMM_predHMMi6.leiden.hmm_mode-subclusters.pred_cnv_genes.dat",
col.names = c("cell_group_name", "gene_region_name", "state", "gene", "chr", "start", "end"))
#remove col names
t17 <- t17[2:length(rownames(t17)),]
t17del <- t17 %>%
mutate(type = gsub("\\..*","", t17$cell_group_name)) %>%
filter(state < 3) %>%
rename(`Sample Name` = type) %>%
mutate(state = as.numeric(state)) %>%
group_by(cell_group_name) %>%
mutate(`Summed States` = sum(state)) %>%
distinct(cell_group_name, .keep_all = TRUE)
#generate plot
tplot <- ggplot(t17del, aes(x = `Sample Name`, y = `Summed States`, fill = `Sample Name`)) +
geom_violin(scale = "width", adjust = 1.5) + theme(axis.text.x = element_text(angle =45, hjust = 1)) +
geom_point(size=0.1, position = "jitter", width = 0.2, size = 2, alpha = 0.7, shape = 21, color = "black") +
labs(
title = "Violin Plot of Copy Number Deletions",
x = "Premenopausal tissue (N) and TN Tumors (TN)",
y = "Sum of Deletions"
) +
scale_fill_brewer(palette = "Set3") +
theme_minimal(base_size = 15) +
theme(legend.position = "none",
panel.border = element_rect(fill = NA, color = "black", linewidth = 1.5),
strip.background = element_rect(fill = "grey90", color = "black", size = 1.5),
panel.spacing = unit(1, "lines"),
plot.title = element_text(face = "bold", hjust = 0.5),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1))
#output plot
tplot

state: multiple wilcoxon test
#create separate tn df
tndel <- t17del %>% mutate(genotype = `Sample Name`)
tn.names <- c("TN_0106", "TN_0126", "TN_0114", "TN_0135")
tndel <- tndel %>% filter(genotype %in% tn.names)
tndel$genotype <- str_sub(tndel$genotype, start = 1, end = 2)
tndel$genotype <- sub("TN", "TN Tumor", tndel$genotype)
tpdel <- t17del %>% mutate(genotype = `Sample Name`)
tp.names <- c("N_0019", "N_0233", "N_0092", "N_0093", "N_0123", "N_0064", "N_0169")
tpdel <- tpdel %>% filter(genotype %in% tp.names)
tpdel$genotype <- str_sub(tpdel$genotype, start = 1, end = 1)
tpdel$genotype <- sub("N", "Human Premenopausal", tpdel$genotype)
#make numeric
tndel$`Summed States` <- as.numeric(tndel$`Summed States`)
tpdel$`Summed States` <- as.numeric(tpdel$`Summed States`)
#wilcox test for brca carriers
w.tn.st <- wilcox.test(tndel$`Summed States`, tpdel$`Summed States`, alternative = "two.sided")
w.tn.st #W = 1126640457, p-value = 0.6612
##
## Wilcoxon rank sum test with continuity correction
##
## data: tndel$`Summed States` and tpdel$`Summed States`
## W = 14274, p-value = 4.961e-06
## alternative hypothesis: true location shift is not equal to 0
plot-length: multiple samples from PC (TN vs normal
premenopausal)
#extract data from brca
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
t17 <- read.table(file = "~/brca-infercnv/tp_subset_output_dir/17_HMM_predHMMi6.leiden.hmm_mode-subclusters.pred_cnv_genes.dat",
col.names = c("cell_group_name", "gene_region_name", "state", "gene", "chr", "start", "end"))
#remove col names
t17 <- t17[2:length(rownames(t17)),]
#deletions
t17del <- t17 %>%
mutate(type = gsub("\\..*","", t17$cell_group_name)) %>%
filter(state < 3) %>%
rename(`Sample Name` = type) %>%
mutate(Length = (as.numeric(end) - as.numeric(start)) / 1e6) %>%
group_by(cell_group_name) %>%
mutate(`Summed Length` = sum(Length)) %>%
distinct(cell_group_name, .keep_all = TRUE)
#violin plot
tplot <- ggplot(t17del, aes(x = `Sample Name`, y = `Summed Length`, fill = `Sample Name`)) +
geom_violin(scale = "width", adjust = 1.5) + theme(axis.text.x = element_text(angle =45, hjust = 1)) +
geom_point(size=0.1, position = "jitter", width = 0.2, size = 2, alpha = 0.7, shape = 21, color = "black") +
labs(
title = "Violin Plot of Copy Number Deletions",
x = "Premenopausal tissue (N) and TN Tumors (TN)",
y = "Summed Length of Deletions"
) +
scale_fill_brewer(palette = "Set3") +
theme_minimal(base_size = 15) +
theme(legend.position = "none",
panel.border = element_rect(fill = NA, color = "black", linewidth = 1.5),
strip.background = element_rect(fill = "grey90", color = "black", size = 1.5),
panel.spacing = unit(1, "lines"),
plot.title = element_text(face = "bold", hjust = 0.5),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1))
#plot
tplot

length: multiple wilcoxon test
#tn df
tndel <- t17del %>% mutate(genotype = `Sample Name`)
tn.names <- c("TN_0106", "TN_0126", "TN_0114", "TN_0135")
tndel <- tndel %>% filter(genotype %in% tn.names)
tndel$genotype <- str_sub(tndel$genotype, start = 1, end = 2)
tndel$genotype <- sub("TN", "TN Tumor", tndel$genotype)
#tp df
tpdel <- t17del %>% mutate(genotype = `Sample Name`)
tp.names <- c("N_0019", "N_0233", "N_0092", "N_0093", "N_0123", "N_0064", "N_0169")
tpdel <- tpdel %>% filter(genotype %in% tp.names)
tpdel$genotype <- str_sub(tpdel$genotype, start = 1, end = 1)
tpdel$genotype <- sub("N", "Human Premenopausal", tpdel$genotype)
#make numeric
tndel$`Summed Length` <- as.numeric(tndel$`Summed Length`)
tpdel$`Summed Length` <- as.numeric(tpdel$`Summed Length`)
#wilcox test for brca carriers
w.tn.ln <- wilcox.test(tndel$`Summed Length`, tpdel$`Summed Length`, alternative = "two.sided")
w.tn.ln #W = 1239947246, p-value < 2.2e-16
##
## Wilcoxon rank sum test with continuity correction
##
## data: tndel$`Summed Length` and tpdel$`Summed Length`
## W = 14887, p-value = 9.738e-08
## alternative hypothesis: true location shift is not equal to 0
plot-state: single normal sample from PC (Pre-neo vs Human
Premeno)
#extract data from brca
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#load data
pn17 <- read.table(file = "~/brca-infercnv/pn_output_dir_1_norm/17_HMM_predHMMi6.leiden.hmm_mode-subclusters.pred_cnv_genes.dat",
col.names = c("cell_group_name", "gene_region_name", "state", "gene", "chr", "start", "end"))
#remove col names
pn17 <- pn17[2:length(rownames(pn17)),]
#deletions
pn17del <- pn17 %>%
mutate(type = gsub("\\..*","", pn17$cell_group_name)) %>%
filter(state < 3) %>%
rename(`Sample Name` = type) %>%
mutate(state = as.numeric(state)) %>%
group_by(cell_group_name) %>%
mutate(`Summed States` = sum(state)) %>%
distinct(cell_group_name, .keep_all = TRUE)
#load data
tp17 <- read.table(file = "~/brca-infercnv/tp_output_dir_05_13/17_HMM_predHMMi6.leiden.hmm_mode-subclusters.pred_cnv_genes.dat",
col.names = c("cell_group_name", "gene_region_name", "state", "gene", "chr", "start", "end"))
#remove col names
tp17 <- tp17[2:length(rownames(tp17)),]
tp.names <- c("N_0019", "N_0233", "N_0092", "N_0093", "N_0123",
"N_0064", "N_0169")
#deletions
tp17del <- tp17 %>%
mutate(type = gsub("\\..*","", tp17$cell_group_name)) %>%
filter(state < 3) %>%
rename(`Sample Name` = type) %>%
mutate(state = as.numeric(state)) %>%
group_by(cell_group_name) %>%
mutate(`Summed States` = sum(state)) %>%
filter(`Sample Name` %in% tp.names) %>%
distinct(cell_group_name, .keep_all = TRUE)
#combine dfs
norm17del <- rbind(pn17del, tp17del)
#generate plot
normplot <- ggplot(norm17del, aes(x = `Sample Name`, y = `Summed States`, fill = `Sample Name`)) +
geom_violin(scale = "width", adjust = 1.5) + theme(axis.text.x = element_text(angle =45, hjust = 1)) +
geom_point(size=0.1, position = "jitter", width = 0.2, size = 2, alpha = 0.7, shape = 21, color = "black") +
labs(
title = "Violin Plot of Copy Number Deletions",
x = "Preneoplastic Tissue (B1) and Premenopausal Tissue (N)",
y = "Sum of Deletions"
) +
scale_fill_brewer(palette = "Set3") +
theme_minimal(base_size = 15) +
theme(legend.position = "none",
panel.border = element_rect(fill = NA, color = "black", linewidth = 1.5),
strip.background = element_rect(fill = "grey90", color = "black", size = 1.5),
panel.spacing = unit(1, "lines"),
plot.title = element_text(face = "bold", hjust = 0.5),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1))
#output plot
normplot

state: single wilcoxon test
#create separate preneo df
pndel <- pn17del %>% mutate(genotype = `Sample Name`)
pn.names <- c("B1_0894", "B1_0033", "B1_0023", "B1_0090")
pndel <- pndel %>% filter(genotype %in% pn.names)
pndel$genotype <- str_sub(pndel$genotype, start = 1, end = 2)
pndel <- pndel %>% filter(genotype == "B1")
pndel$genotype <- sub("B1", "BRCA1 Preneoplastic", pndel$genotype)
tpdel <- tp17del %>% mutate(genotype = `Sample Name`)
tp.names <- c("N_0019", "N_0233", "N_0092", "N_0093", "N_0123",
"N_0064", "N_0169")
tpdel <- tpdel %>% filter(genotype %in% tp.names)
tpdel$genotype <- str_sub(tpdel$genotype, start = 1, end = 1)
tpdel$genotype <- sub("N", "Human Premenopausal", tpdel$genotype)
#make numeric
pndel$`Summed States` <- as.numeric(pndel$`Summed States`)
tpdel$`Summed States` <- as.numeric(tpdel$`Summed States`)
#xilcox test for tumors
w.norm.st <- wilcox.test(pndel$`Summed States`, tpdel$`Summed States`, alternative = "two.sided")
w.norm.st #W = 96990, p-value = 0.4548
##
## Wilcoxon rank sum test with continuity correction
##
## data: pndel$`Summed States` and tpdel$`Summed States`
## W = 103300, p-value = 0.741
## alternative hypothesis: true location shift is not equal to 0
plot-length: single normal sample from PC (Preneo vs Human
Premeno)
#extract data from brca1 preneo
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#load data
pn17 <- read.table(file = "~/brca-infercnv/pn_output_dir_1_norm/17_HMM_predHMMi6.leiden.hmm_mode-subclusters.pred_cnv_genes.dat",
col.names = c("cell_group_name", "gene_region_name", "state", "gene", "chr", "start", "end"))
#remove col names
pn17 <- pn17[2:length(rownames(pn17)),]
#deletions
pn17del <- pn17 %>%
mutate(type = gsub("\\..*","", pn17$cell_group_name)) %>%
filter(state < 3) %>%
rename(`Sample Name` = type) %>%
mutate(Length = (as.numeric(end) - as.numeric(start)) / 1e6) %>%
group_by(cell_group_name) %>%
mutate(`Summed Length` = sum(Length)) %>%
distinct(cell_group_name, .keep_all = TRUE)
#extract data from tp meno
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#load data
tp17 <- read.table(file = "~/brca-infercnv/tp_output_dir_05_13/17_HMM_predHMMi6.leiden.hmm_mode-subclusters.pred_cnv_genes.dat",
col.names = c("cell_group_name", "gene_region_name", "state", "gene", "chr", "start", "end"))
#remove col names
tp17 <- tp17[2:length(rownames(tp17)),]
#deletions
tp17del <- tp17 %>%
mutate(type = gsub("\\..*","", tp17$cell_group_name)) %>%
filter(state < 3) %>%
rename(`Sample Name` = type) %>%
mutate(Length = (as.numeric(end) - as.numeric(start)) / 1e6) %>%
group_by(cell_group_name) %>%
mutate(`Summed Length` = sum(Length)) %>%
distinct(cell_group_name, .keep_all = TRUE) %>%
filter(`Sample Name` != "N_1105_epi")
#combine dfs
norm17del <- rbind(pn17del, tp17del)
#violin plot
normplot <- ggplot(norm17del, aes(x = `Sample Name`, y = `Summed Length`, fill = `Sample Name`)) +
geom_violin(scale = "width", adjust = 1.5) + theme(axis.text.x = element_text(angle =45, hjust = 1)) +
geom_point(size=0.1, position = "jitter", width = 0.2, size = 2, alpha = 0.7, shape = 21, color = "black") +
labs(
title = "Violin Plot of Copy Number Deletions",
x = "Preneoplastic Tissue (B1) and Premenopausal Tissue (N)",
y = "Summed Length of Deletions"
) +
scale_fill_brewer(palette = "Set3") +
theme_minimal(base_size = 15) +
theme(legend.position = "none",
panel.border = element_rect(fill = NA, color = "black", linewidth = 1.5),
strip.background = element_rect(fill = "grey90", color = "black", size = 1.5),
panel.spacing = unit(1, "lines"),
plot.title = element_text(face = "bold", hjust = 0.5),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1))
#plot
normplot

length: single wilcoxon test
#create separate preneo df
pndel <- pn17del %>% mutate(genotype = `Sample Name`)
pn.names <- c("B1_0894", "B1_0033", "B1_0023", "B1_0090")
pndel <- pndel %>% filter(genotype %in% pn.names)
pndel$genotype <- str_sub(pndel$genotype, start = 1, end = 2)
pndel <- pndel %>% filter(genotype == "B1")
pndel$genotype <- sub("B1", "BRCA1 Preneoplastic", pndel$genotype)
tpdel <- tp17del %>% mutate(genotype = `Sample Name`)
tp.names <- c("N_0019", "N_0233", "N_0092", "N_0093", "N_0123", "N_0064", "N_0169")
tpdel <- tpdel %>% filter(genotype %in% tp.names)
tpdel$genotype <- str_sub(tpdel$genotype, start = 1, end = 1)
tpdel$genotype <- sub("N", "Human Premenopausal", tpdel$genotype)
#make numeric
pndel$`Summed Length` <- as.numeric(pndel$`Summed Length`)
tpdel$`Summed Length` <- as.numeric(tpdel$`Summed Length`)
#xilcox test for tumors
w.norm.ln <- wilcox.test(pndel$`Summed Length`, tpdel$`Summed Length`, alternative = "two.sided")
w.norm.ln #W = 101848, p-value = 0.9706
##
## Wilcoxon rank sum test with continuity correction
##
## data: pndel$`Summed Length` and tpdel$`Summed Length`
## W = 101848, p-value = 0.9706
## alternative hypothesis: true location shift is not equal to 0